#include "stdafx.h"
#include "AudioStream.h"
#include <mmsystem.h> 

// WaveOut functionality attributed to 
//  msdn tutorial by David Overton

// WaveOut function prototypes
static void CALLBACK waveOutProc(HWAVEOUT, UINT, DWORD, DWORD, DWORD);
static WAVEHDR* allocateBlocks(int size, int count);
static void freeBlocks(WAVEHDR* blockArray);
static void writeAudio(HWAVEOUT hWaveOut, LPSTR data, int size);

// WaveOut variables
static CRITICAL_SECTION waveCriticalSection;
static WAVEHDR* waveBlocks;
volatile int waveFreeBlockCount;
static int waveCurrentBlock;

//
// Callback we only handle this when data is done
//
void CALLBACK waveOutProc(HWAVEOUT hwo,
						  UINT uMsg,
						  DWORD dwInstance, 
						  DWORD dwParam1,    
						  DWORD dwParam2)
{
    // pointer to free block counter
    int* freeBlockCounter = (int*)dwInstance;

    // Only handle certain callbacks
    if(uMsg != WOM_DONE)
		return;

	// We now have one more free block
    EnterCriticalSection(&waveCriticalSection);
    (*freeBlockCounter)++;
    LeaveCriticalSection(&waveCriticalSection);
}

//
// Fill one block
//
void writeAudio(HWAVEOUT hWaveOut, LPSTR data, int size)
{
    WAVEHDR* current;
    current = &waveBlocks[waveCurrentBlock];

	// Unprepare before prepare
	if(current->dwFlags & WHDR_PREPARED) 
		waveOutUnprepareHeader(hWaveOut, current, sizeof(WAVEHDR)); 

	memcpy(current->lpData, data, size); // Copy data
    waveOutPrepareHeader(hWaveOut, current, sizeof(WAVEHDR)); // Prepare
    waveOutWrite(hWaveOut, current, sizeof(WAVEHDR)); // Write

    EnterCriticalSection(&waveCriticalSection);
    waveFreeBlockCount--;
    LeaveCriticalSection(&waveCriticalSection);

	while(!waveFreeBlockCount) {
		//cout << "Sleeping\n";
        Sleep(10);
	}

	waveCurrentBlock++;
    waveCurrentBlock %= BLOCK_COUNT;
    current = &waveBlocks[waveCurrentBlock];
}

//
// One time block allocation
//
WAVEHDR* allocateBlocks(int size, int count)
{
    char *buffer;
    int i;
    WAVEHDR* blocks;
    DWORD totalBufferSize = (size + sizeof(WAVEHDR)) * count;
  
    // allocate memory for the entire set in one go
    buffer = (char*)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, totalBufferSize);

    // and set up the pointers to each bit
    blocks = (WAVEHDR*)buffer;
    buffer += sizeof(WAVEHDR) * count;

    for(i = 0; i < count; i++)
	{
        blocks[i].dwBufferLength = size;
        blocks[i].lpData = buffer;
        blocks[i].dwFlags = 0;
        buffer += size;
    }
    return blocks;
}

void freeBlocks(WAVEHDR* blockArray)
{
    HeapFree(GetProcessHeap(), 0, blockArray);
}

//
// Constructor
// Create all buffers
// Initialize ALL bools
// Create filter specs/states
// Spawn threads, 4 filtering, 1 demod/combo/audio
//
AudioStream::AudioStream()
{
	programRunning = true;
	
	for(int i = 0; i < 2; i++)
		for(int j = 0; j < 2; j++) {
			procDoneEvent[i][j] = CreateEventExA(NULL, NULL, EVENT_FLAGS, EVENT_ACCESS);
			dataReadyEvent[i][j] = CreateEventExA(NULL, NULL, EVENT_FLAGS, EVENT_ACCESS);
		}
	bufferedData = 0;

	buf[0] = ippsMalloc_32f(INLEN);
	buf[1] = ippsMalloc_32f(INLEN);
	inBuf = buf[0];
	procBuf = buf[1];

	ddc[0][0] = ippsMalloc_32f(DDCLEN);
	ddc[0][1] = ippsMalloc_32f(DDCLEN);
	ddc[1][0] = ippsMalloc_32f(DDCLEN);
	ddc[1][1] = ippsMalloc_32f(DDCLEN);

	iir[0][0] = ippsMalloc_32f(IIRLEN);
	iir[0][1] = ippsMalloc_32f(IIRLEN);
	iir[1][0] = ippsMalloc_32f(IIRLEN);
	iir[1][1] = ippsMalloc_32f(IIRLEN);

	firTaps = ippsMalloc_32f(FIRTAPSLEN);

	fir[0][0] = ippsMalloc_32f(FIRLEN);
	fir[0][1] = ippsMalloc_32f(FIRLEN);
	fir[1][0] = ippsMalloc_32f(FIRLEN);
	fir[1][1] = ippsMalloc_32f(FIRLEN);

	demodIn[0][0] = ippsMalloc_32f(DEMODLEN);
	demodIn[0][1] = ippsMalloc_32f(DEMODLEN);
	demodIn[1][0] = ippsMalloc_32f(DEMODLEN);
	demodIn[1][1] = ippsMalloc_32f(DEMODLEN);

	demodOut[0] = ippsMalloc_32f(DEMODLEN);
	demodOut[1] = ippsMalloc_32f(DEMODLEN);

	channelOut[0] = ippsMalloc_32f(OUTLEN);
	channelOut[1] = ippsMalloc_32f(OUTLEN);

	audioIn = ippsMalloc_16s(8192);

	// Generate low-pass iir taps, fc = 0.0025, 0.2 ripple, 8 point
	Ipp64f iirTapTemp[8];
	ippsIIRGenLowpass_64f(0.0025, 0.2, 3, iirTapTemp, ippChebyshev1);
	ippsConvert_64f32f(iirTapTemp, iirTaps, 8);

	ippsIIRInitAlloc_32f(&iirState[0][0], iirTaps, 3, NULL);
	ippsIIRInitAlloc_32f(&iirState[0][1], iirTaps, 3, NULL);
	ippsIIRInitAlloc_32f(&iirState[1][0], iirTaps, 3, NULL);
	ippsIIRInitAlloc_32f(&iirState[1][1], iirTaps, 3, NULL);

	// Generate low-pass fir taps and state, fc = 0.025, len = 1023
	Ipp64f* firTapTemp = ippsMalloc_64f(FIRTAPSLEN);
	ippsFIRGenLowpass_64f(0.03125, firTapTemp, FIRTAPSLEN, ippWinBlackman, ippTrue);
	ippsConvert_64f32f(firTapTemp, firTaps, FIRTAPSLEN);
	ippsFree(firTapTemp);

	ippsFIRInitAlloc_32f(&firState[0][0], firTaps, FIRTAPSLEN, NULL);
	ippsFIRInitAlloc_32f(&firState[0][1], firTaps, FIRTAPSLEN, NULL);
	ippsFIRInitAlloc_32f(&firState[1][0], firTaps, FIRTAPSLEN, NULL);
	ippsFIRInitAlloc_32f(&firState[1][1], firTaps, FIRTAPSLEN, NULL);

	Ipp64f alpTemp[8];
	ippsIIRGenLowpass_64f(0.02, .2, 3, alpTemp, ippChebyshev1);
	ippsConvert_64f32f(alpTemp, alpTaps, 8);
	ippsIIRInitAlloc_32f(&audioLowPass, alpTaps, 3, NULL);

	Ipp64f fmdTemp[8];
	ippsIIRGenLowpass_64f(0.0033, .2, 1, fmdTemp, ippButterworth);
	ippsConvert_64f32f(fmdTemp, fmdTaps, 8);
	ippsIIRInitAlloc_32f(&fmDeemphasis, fmdTaps, 1, NULL);

	DWORD tid; // Throw-away thread ID

	// Create our four processing threads
	threads[0] = CreateThread(NULL, 0, threadEntry,
		(void*)new tParam(this, LEFT, REAL), 0, &tid);
	threads[1] = CreateThread(NULL, 0, threadEntry,
		(void*)new tParam(this, LEFT, IMAG), 0, &tid);
	threads[2] = CreateThread(NULL, 0, threadEntry,
		(void*)new tParam(this, RIGHT, REAL), 0, &tid);
	threads[3] = CreateThread(NULL, 0, threadEntry,
		(void*)new tParam(this, RIGHT, IMAG), 0, &tid);

	demodHandle = CreateThread(NULL, 0, demodEntry, (void*)this, 0, &tid);
}

//
// Deconstructor
//
AudioStream::~AudioStream()
{
	// Setting to false, breaks threads from loops
	programRunning = false;
	DWORD waitResult[4];
	DWORD demodResult;

	// Set our Events, will break from loops
	//SetEvent(dataReadyEvent);
	for(int i = 0; i < 2; i++) {
		for(int j = 0; j < 2; j++) {
			SetEvent(procDoneEvent[i][j]);
			SetEvent(dataReadyEvent[i][j]);
		}
	}

	// Waiting
	for(int i = 0; i < 4; i++)
		waitResult[i] = WaitForSingleObject(threads[i], 250);
	demodResult = WaitForSingleObject(demodHandle, 250);

	// Force termination if necessary
	for(int i = 0; i < 4; i++)
		if(waitResult[i] == WAIT_TIMEOUT || waitResult[i] == WAIT_FAILED)
			TerminateThread(threads[i], 0);

	if(demodResult == WAIT_TIMEOUT || demodResult == WAIT_FAILED)
		TerminateThread(demodHandle, 0);

	// Closing
	for(int i = 0; i < 4; i++)
		CloseHandle(threads[i]);
	CloseHandle(demodHandle);

	// Threads are closed, now close events
	//CloseHandle(dataReadyEvent);
	for(int i = 0; i < 2; i++) {
		for(int j = 0; j < 2; j++) {
			CloseHandle(procDoneEvent[i][j]);
			CloseHandle(dataReadyEvent[i][j]);
		}
	}

	// Threads are closed, destroy all data
	ippsFree(buf[0]);
	ippsFree(buf[1]);
	
	ippsFree(ddc[0][0]);
	ippsFree(ddc[1][0]);
	ippsFree(ddc[0][1]);
	ippsFree(ddc[1][1]);

	ippsFree(iir[0][0]);
	ippsFree(iir[1][0]);
	ippsFree(iir[0][1]);
	ippsFree(iir[1][1]);
	
	ippsFree(firTaps);

	ippsFree(fir[0][0]);
	ippsFree(fir[1][0]);
	ippsFree(fir[0][1]);
	ippsFree(fir[1][1]);

	ippsFree(demodIn[0][0]);
	ippsFree(demodIn[1][0]);
	ippsFree(demodIn[0][1]);
	ippsFree(demodIn[1][1]);

	ippsFree(demodOut[0]);
	ippsFree(demodOut[1]);

	ippsFree(channelOut[0]);
	ippsFree(channelOut[1]);

	ippsFree(audioIn);

	ippsIIRFree_32f(iirState[0][0]);
	ippsIIRFree_32f(iirState[1][0]);
	ippsIIRFree_32f(iirState[0][1]);
	ippsIIRFree_32f(iirState[1][1]);

	ippsFIRFree_32f(firState[0][0]);
	ippsFIRFree_32f(firState[1][0]);
	ippsFIRFree_32f(firState[0][1]);
	ippsFIRFree_32f(firState[1][1]);

	ippsIIRFree_32f(audioLowPass);
	ippsIIRFree_32f(fmDeemphasis);
}

//
// Public API function, bufferData
// Buffers 299008 floats, the audioStream put the
//   data in the correct location, and if needed, begins
//   downconverting to audio when it has 10million samples
//
void AudioStream::bufferData(float *f)
{
	int end = bufferedData + RETSIZE;

	if(end < INLEN) {                                   // All can move into buffer
		ippsMove_32f(f, inBuf+bufferedData, RETSIZE);   // Move it
		bufferedData = end;                             // Update bufferedData index
	} else {                                            // Filled the buffer
		int toFirst = INLEN - bufferedData - 1;         // Get how much goes in first
		int rest = RETSIZE - toFirst;                   // Get how much goes in second
		ippsMove_32f(f, inBuf+bufferedData, toFirst);   // Move first
		swapBufPtr();                                   // Change buffer pointers
		ippsCopy_32f(f+toFirst, inBuf, rest);           // Move rest into new holding buffer
		bufferedData = rest;                            // Set new buf index

		SetEvent(dataReadyEvent[0][0]);
		SetEvent(dataReadyEvent[0][1]);
		SetEvent(dataReadyEvent[1][0]);
		SetEvent(dataReadyEvent[1][1]);
	}
}

//
// Public API function to set the "station" of a speaker/channel
// Station at frequency f, in Hz
//
void AudioStream::setChannelFreq(float f, Channel c)
{
	float diff = 20.0e6f-(108.0e6f-f);
	float freq = (diff/80000000) + 0.125f;

	for(int i = 0; i < DDCLEN; i++) {
		ddc[c][REAL][i] = sin(i * freq * TWO_PI);
		ddc[c][IMAG][i] = cos(i * freq * TWO_PI);
	}
}

//
// Thread Entry
// Use our struct to call processThread with a "channel"
//
DWORD WINAPI AudioStream::threadEntry(LPVOID param)
{
	tParam* tp = (tParam*)param;
	return tp->as->processThread(tp->c, tp->p);
}

// 
// Demod thread entry, simply calls demod thread, this* is the param
//
DWORD WINAPI AudioStream::demodEntry(LPVOID param)
{
	AudioStream* as = (AudioStream*)param;
	return as->demodThread();
}

//
// Main Processing Threads
// Each of 4 threads responsible for a channel(left/right) and a part(real/imag)
//
DWORD AudioStream::processThread(Channel c, Part p)
{
	CString debug;
	int dsLen, phase = 0;

	// Main loop
	while(programRunning) {

		WaitForSingleObject(dataReadyEvent[c][p], INFINITE);
		ResetEvent(dataReadyEvent[c][p]);
		{
			// Downconvert to our channels frequency
			for(int i = 0; i < INLEN / DDCLEN; i++) 
				ippsMul_32f(procBuf+(i*DDCLEN), ddc[c][p], iir[c][p]+(i*DDCLEN), DDCLEN);
			// Our IIR filter, with downsample of factor 25
			phase = 24;
			ippsIIR_32f_I(iir[c][p], IIRLEN, iirState[c][p]); 
			ippsSampleDown_32f(iir[c][p], IIRLEN, fir[c][p], &dsLen, 25, &phase);
			// Our FIR filter, with downsample of factor 5
			phase = 4;
			ippsFIR_32f_I(fir[c][p], FIRLEN, firState[c][p]);
			ippsSampleDown_32f(fir[c][p], FIRLEN, demodIn[c][p], &dsLen, 5, &phase);

			SetEvent(procDoneEvent[c][p]); 
		}

	}
	return 0;
}

//
// Demod Thread loop
// 1) Waits for downsampled data 
// 2) fm demodulates both channels
// 3) Average steps every 20 samples
// 4) Interleaves the results
// 5) Feeds the results to the audio device
//
DWORD AudioStream::demodThread(void)
{
    HWAVEOUT hWaveOut; // device handle 
    WAVEFORMATEX wfx;  

    waveBlocks = allocateBlocks(BLOCK_SIZE, BLOCK_COUNT);
    waveFreeBlockCount = BLOCK_COUNT;
    waveCurrentBlock= 0;
    InitializeCriticalSection(&waveCriticalSection);

    // set up the WAVEFORMATEX structure.
	wfx.nSamplesPerSec = 32000;
    wfx.wBitsPerSample = 16;         // sample size
    wfx.nChannels = 2;               // channels
    wfx.cbSize = 0;                  // size of _extra_ info
    wfx.wFormatTag = WAVE_FORMAT_PCM;
    wfx.nBlockAlign = (wfx.wBitsPerSample*wfx.nChannels) >> 3;
    wfx.nAvgBytesPerSec = wfx.nBlockAlign*wfx.nSamplesPerSec;
 
    // Try to open the default wave device. WAVE_MAPPER is
    //   a constant defined in mmsystem.h, it always points to the
    //   default wave device on the system (some people have 2 or
    //   more sound cards).
	if( waveOutOpen(
		&hWaveOut, 
		WAVE_MAPPER, 
		&wfx, 
		(DWORD)waveOutProc, 
		(DWORD)&waveFreeBlockCount, 
		CALLBACK_FUNCTION) != MMSYSERR_NOERROR)
	{
		MessageBox(NULL,
			"Error: opening sound card failed",
			"ERROR",
			MB_OK);
		return 0;
	}

	// Wait for all processing to complete
	while(programRunning) {
		for(int i = 0; i < 2; i++)
			for(int j = 0; j < 2; j++)
				WaitForSingleObject(procDoneEvent[i][j], INFINITE);

		demodAndScale(LEFT);
		demodAndScale(RIGHT);

		// Interleave two channels
		for(int i = 0; i < 4096; i++) {
			audioIn[i*2]   = (short)(10000*channelOut[LEFT][i]);
			audioIn[i*2+1] = (short)(10000*channelOut[RIGHT][i]);
		}

		// Push out audio
		writeAudio(hWaveOut, (char*)audioIn, 16384);

		// Reset all our processing events
		for(int i = 0; i < 2; i++)
			for(int j = 0; j < 2; j++)
				ResetEvent(procDoneEvent[i][j]);
	}

	return 0;
}

//
// Demodulate and average 20 consecutive samples
//
void AudioStream::demodAndScale(Channel c)
{
	ippsPhase_32f(demodIn[c][REAL], demodIn[c][IMAG], demodOut[c], DEMODLEN);

	float lastPhase = 0.0f;
	for(int i = 0; i < DEMODLEN; i++) {
		float delPhase = demodOut[c][i] - lastPhase;
		if(delPhase > PI) delPhase -= TWO_PI;
		else if(delPhase < -PI) delPhase += TWO_PI;
		lastPhase = demodOut[c][i];
		demodOut[c][i] = delPhase;
	}

	// Two low pass filters to clean up the audio
	// One for maintaining nyquist, the other for FM De-emphasis
	int dsLen, phase = 0;
	ippsIIR_32f_I(demodOut[c], DEMODLEN, audioLowPass); 
	ippsIIR_32f_I(demodOut[c], DEMODLEN, fmDeemphasis);
	ippsSampleDown_32f(demodOut[c], DEMODLEN, channelOut[c], &dsLen, 20, &phase);
}